BatchToSpace

将输入张量在批维度上分块并重新分布到空间维度,同时按照 crops 对输出空间范围进行裁剪。

\[\begin{split}\begin{aligned} N_{\text{out}} &= \frac{N}{b_h \times b_w}, \\ H_{\text{out}} &= b_h \times H - c_{\text{top}} - c_{\text{bottom}}, \\ W_{\text{out}} &= b_w \times W - c_{\text{left}} - c_{\text{right}}, \\ ext{output}[n, h, w, c] &= \text{input}[n', h', w', c] \end{aligned}\end{split}\]

其中 \(N, H, W, C\) 分别表示输入的 batch、高度、宽度和通道数;\(b_h, b_w\)block_size\(c_{*}\) 来源于 crops\(n', h', w'\)BatchToSpace 映射关系确定。

输入:
  • input - 输入数据地址。

  • in_shape - 输入形状,格式为 [batch, height, width, channel]

  • block_size - 分块因子,格式为 [block_h, block_w]

  • crops - 裁剪参数,格式为 [top, bottom, left, right]

  • core_mask(int, 可选) - 核掩码(仅适用于共享存储版本)。

输出:
  • output - 输出数据地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持 fp32、fp64、cplx64、cplx128、int16、int8、int32 数据类型。

  • MT7004 支持 fp32、fp16、cplx64、int16、int32 数据类型。

共享存储版本:

void i8_batch_to_space_s(const int8_t *input, int8_t *output, const int *in_shape, const int *block, const int *crops, int core_mask)
void i16_batch_to_space_s(const int16_t *input, int16_t *output, const int *in_shape, const int *block, const int *crops, int core_mask)
void i32_batch_to_space_s(const int *input, int *output, const int *in_shape, const int *block, const int *crops, int core_mask)
void hp_batch_to_space_s(const half *input, half *output, const int *in_shape, const int *block, const int *crops, int core_mask)
void fp_batch_to_space_s(const float *input, float *output, const int *in_shape, const int *block, const int *crops, int core_mask)
void dp_batch_to_space_s(const double *input, double *output, const int *in_shape, const int *block, const int *crops, int core_mask)
void c64_batch_to_space_s(const float *input, float *output, const int *in_shape, const int *block, const int *crops, int core_mask)
void c128_batch_to_space_s(const double *input, double *output, const int *in_shape, const int *block, const int *crops, int core_mask)

C 调用示例:

 1// 多核(共享存储)示例
 2#include <stdio.h>
 3
 4int main(int argc, char *argv[]) {
 5        float *input = (float *)0xA0000000;   // 输入在 DDR 空间
 6        float *output = (float *)0xB0000000;
 7        int input_shape[4] = {400, 2, 2, 3};
 8        int block_size[2] = {2, 2};
 9        int crops[4] = {0, 0, 0, 0};
10        int core_mask = 0xff;
11        fp_batch_to_space_s(input, output, input_shape, block_size, crops, core_mask);
12        return 0;
13}

私有存储版本:

void i8_batch_to_space_p(const int8_t *input, int8_t *output, const int *in_shape, const int *block, const int *crops)
void i16_batch_to_space_p(const int16_t *input, int16_t *output, const int *in_shape, const int *block, const int *crops)
void i32_batch_to_space_p(const int *input, int *output, const int *in_shape, const int *block, const int *crops)
void hp_batch_to_space_p(const half *input, half *output, const int *in_shape, const int *block, const int *crops)
void fp_batch_to_space_p(const float *input, float *output, const int *in_shape, const int *block, const int *crops)
void dp_batch_to_space_p(const double *input, double *output, const int *in_shape, const int *block, const int *crops)
void c64_batch_to_space_p(const float *input, float *output, const int *in_shape, const int *block, const int *crops)
void c128_batch_to_space_p(const double *input, double *output, const int *in_shape, const int *block, const int *crops)

C 调用示例:

 1// 单核(私有存储)示例
 2#include <stdio.h>
 3
 4int main(int argc, char *argv[]) {
 5        float *input = (float *)0x10000000;   // 输入在 L2 空间
 6        float *output = (float *)0x10010000;
 7        int input_shape[4] = {400, 2, 2, 3};
 8        int block_size[2] = {2, 2};
 9        int crops[4] = {0, 0, 0, 0};
10        fp_batch_to_space_p(input, output, input_shape, block_size, crops);
11        return 0;
12}